* Prepare BvD-year dataset of firm-id and patent variables
* ---------------------------------------------------------

* Create a firm-country-year dataset of baseline regression firms and prepare randomization
use BvDIDnumber year industry k${depvar}_${ttt} k${depvar}_${ttt}0 kNOT_${depvar}_${ttt} ///
kNOT_${depvar}_${ttt}0 spill${depvar}${ttt}_1995_a spillN${depvar}${ttt}_1995_a ///
spill${depvar}${ttt}_1995_a0 spillN${depvar}${ttt}_1995_a0 ${depvar}_${ttt} country_shr_1995 ///
missing_weights_1995 maxweight_1995 missing_spill_weights_1995 using ${final_dir}/regression_dataset${weight_window}${tf}.dta, clear

*restrict to baseline regression firms
mmerge BvD using ${final_dir}/bvd_list_regfirms_auto95.dta, unmatched(none) 
keep if year >= 1995 & year <= 2011
drop _m

sort country_shr_1995 BvD year
*replace BvD with firm_id_og
bys country_shr_1995 BvD: gen firm_id_og = 1 if _n == 1
replace firm_id_og = sum(firm_id_og)
save ${monte_data_dir}/bvd_year_firmdata_firmweightdomestic_`rname'.dta, replace

preserve
*go to firm-homecountry level
keep BvD firm_id_og country_shr_1995 
duplicates drop
*create country groups and note all IDS
bys country_shr_1995: gen tag_first = firm_id_og if _n == 1
bys country_shr_1995: egen first_id = max(tag_first)
bys country_shr_1995: gen tag_last = firm_id_og if _n == _N
bys country_shr_1995: egen last_id = max(tag_last)
drop tag* 
sort firm_id_og
save ${monte_data_dir}/bvd_hid_list_firmweightdomestic_`rname'.dta, replace
restore

* Prepare BvD-year dataset of all weighted variables
* --------------------------------------------------
sort firm_id_og year
ren firm_id_og firm_id
keep firm_id BvD year
mmerge BvDIDnumber year using ${final_dir}/regression_dataset${weight_window}${tf}.dta, ukeep(lswMPm_1995_a hswMPm_1995_a vaempMPm_1995_a gdppcMPm_1995_a lngdpgap_1995_a lngdpgap_shr_foreign_1995_a hswMPm_shr4_foreign_1995_a lswMPm_shr4_foreign_1995_a gdppcMPm_shr4_foreign_1995_a vaempMPm_shr4_foreign_1995_a) unmatched(master)
drop _m BvD
sort firm_id year
save ${monte_data_dir}/bvd_year_macrodata_firmweightdomestic_`rname'.dta, replace